require(stringi)
require(dichromat)
color <- colorschemes$Categorical.12

LDP.data.Study2 <- read.csv("LDP_data_Study2.csv")
ideal.point.data <- read.csv("ideal_point_data.csv")

#### ideal point plot (Online Appendix F.2) ####
ideal.point.data.plot <- ideal.point.data
for (i in 43:47) {
  top.leader.list <- unique(LDP.data.Study2$mp_name[LDP.data.Study2$elec.term == i & LDP.data.Study2$top.leader == 1])
  ideal.point.data.plot$party[ideal.point.data.plot$elec.term == i & 
                                ideal.point.data.plot$name %in% top.leader.list] <- "LDP.leader"
}

cairo_pdf("Figure_A1.pdf", width = 5, height = 7.5, pointsize = 9)
layout(matrix(1:6, 3, 2, byrow = TRUE))
par(mar = c(2, 2, 3, 0.5), pty = "s", lwd = 0.5)
for (i in 43:47) {
  plot(NULL, NULL, type = "n", bty = "n", xlim = c(-4, 3), ylim = c(-3.5, 3.5), 
       main = c(2003, 2005, 2009, 2012, 2014)[i - 42], 
       xlab = "", ylab = "", xaxt = "n", yaxt = "n", cex.main = 2)
  points(subset(ideal.point.data.plot, elec.term == i & party == "LDP")$theta.1, 
         subset(ideal.point.data.plot, elec.term == i & party == "LDP")$theta.2, 
         pch = 19, col = paste0(color[12], "B0"))
  points(subset(ideal.point.data.plot, elec.term == i & party == "DPJ")$theta.1, 
         subset(ideal.point.data.plot, elec.term == i & party == "DPJ")$theta.2, 
         pch = 15, col = paste0(color[10], "B0"))
  points(subset(ideal.point.data.plot, elec.term == i & party == "CGP")$theta.1, 
         subset(ideal.point.data.plot, elec.term == i & party == "CGP")$theta.2, 
         pch = 17, col = paste0(color[2], "B0"))
  points(subset(ideal.point.data.plot, elec.term == i & (party == "JCP" | party == "SDP"))$theta.1, 
         subset(ideal.point.data.plot, elec.term == i & (party == "JCP" | party == "SDP"))$theta.2, 
         pch = 4, col = "#080808B0")
  points(subset(ideal.point.data.plot, elec.term == i & (party == "JRP" | party == "JIP" | party == "YP"))$theta.1, 
         subset(ideal.point.data.plot, elec.term == i & (party == "JRP" | party == "JIP" | party == "YP"))$theta.2, 
         pch = 18, col = paste0(color[6], "B0"), cex = 1.2)
  points(subset(ideal.point.data.plot, elec.term == i & party == "LDP.leader")$theta.1, 
         subset(ideal.point.data.plot, elec.term == i & party == "LDP.leader")$theta.2, 
         pch = 21, col = paste0(color[12], "B0"), bg = "white")
  axis(1, lwd = 0.5)
  axis(2, lwd = 0.5)
}
plot(NULL, NULL, type = "n", bty = "n", xlim = c(0, 1), ylim = c(0, 1), 
     xlab = "", ylab = "", xaxt = "n", yaxt = "n")
legend("top", legend = c("LDP", "DPJ", "Komeito", "JCP/SDP", "JRP/JIP/YP"), 
       col = c(paste0(color[12], "B0"), paste0(color[10], "B0"), 
               paste0(color[2], "B0"), "#080808B0", paste0(color[6], "B0")), 
       pch = c(19, 15, 17, 4, 18), bty = "n", cex = 2, 
       pt.cex = c(rep(1.2, 4), 1.44))
dev.off()

#### validation: Twitter-based ideal points (Online Appendix F.2) ####
Twitter.data <- read.csv("Miwa_2017_JJES.csv")
ideal.point.data.2014 <- subset(ideal.point.data, elec.term == 47)

Twitter.merged.data <- merge(ideal.point.data.2014, Twitter.data, 
                             by = "name", all = FALSE)

nrow(Twitter.merged.data)  # number of available MPs
nrow(subset(Twitter.merged.data, party == "LDP"))  # number of available LDP MPs

# entire correlation
round(cor(Twitter.merged.data$theta.1, Twitter.merged.data$phi), 3)
# correlation within the LDP
round(cor(Twitter.merged.data$theta.1[Twitter.merged.data$party == "LDP"], 
          Twitter.merged.data$phi[Twitter.merged.data$party == "LDP"]), 3)

cairo_pdf("Figure_A2.pdf", width = 3, height = 3, pointsize = 7)
par(mar = c(4, 4, 0.5, 0.5), pty = "s", lwd = 0.5)
plot(NULL, NULL, type = "n", xlim = c(-4, 2), ylim = c(-1.6, 1.6), 
     xlab = "Elite survey-based estimates", 
     ylab = "Twitter-based estimates", xaxt = "n", yaxt = "n")
points(Twitter.merged.data$theta.1, Twitter.merged.data$phi, 
       col = ifelse(Twitter.merged.data$party == "LDP", paste0(color[12], "B0"), 
                    ifelse(Twitter.merged.data$party == "DPJ", paste0(color[10], "B0"), 
                           ifelse(Twitter.merged.data$party == "CGP", paste0(color[2], "B0"), 
                                  ifelse(Twitter.merged.data$party == "JCP" | 
                                           Twitter.merged.data$party == "SDP", "#080808B0", 
                                         ifelse(Twitter.merged.data$party == "JIP", paste0(color[6], "B0"), "black"))))), 
       pch = ifelse(Twitter.merged.data$party == "LDP", 19, 
                    ifelse(Twitter.merged.data$party == "DPJ", 15, 
                           ifelse(Twitter.merged.data$party == "CGP", 17, 
                                  ifelse(Twitter.merged.data$party == "JCP" | 
                                           Twitter.merged.data$party == "SDP", 4, 
                                         ifelse(Twitter.merged.data$party == "JIP", 18, 1))))))
legend("bottomright", legend = c("LDP", "DPJ", "Komeito", "JCP/SDP", "JIP", "other"), 
       col = c(paste0(color[12], "B0"), paste0(color[10], "B0"), 
               paste0(color[2], "B0"), "#080808B0", 
               paste0(color[6], "B0"), "black"), 
       pch = c(19, 15, 17, 4, 18, 1), 
       pt.cex = c(rep(1.2, 4), 1.44, 1.2))
axis(1, lwd = 0.5)
axis(2, lwd = 0.5)
dev.off()

#### validation: Nippon Kaigi affiliation (Online Appendix F.2) ####
Nippon.Kaigi.data <- read.csv("Nippon_Kaigi_data.csv")

Nippon.Kaigi.merged.data <- merge(ideal.point.data.2014, Nippon.Kaigi.data, 
                                  by = "name", all = FALSE)
Nippon.Kaigi.merged.data <- subset(Nippon.Kaigi.merged.data, ! is.na(theta.1))

nrow(Nippon.Kaigi.merged.data)  # number of available MPs
nrow(subset(Nippon.Kaigi.merged.data, party == "LDP"))  # number of available LDP MPs
sum(Nippon.Kaigi.merged.data$kaigi)  # number of MPs affiliated to Nippon Kaigi
sum(subset(Nippon.Kaigi.merged.data, party == "LDP")$kaigi)  # number of LDP MPs affiliated to Nippon Kaigi

# predict Nippon Kaigi affiliation by ideal points using the entire sample
logit.result.entire <- glm(kaigi ~ theta.1, family = binomial, 
                           data = Nippon.Kaigi.merged.data)
round(summary(logit.result.entire)$coefficients, 3)
# predict Nippon Kaigi affiliation by ideal points using the LDP sample
logit.result.LDP <- glm(kaigi ~ theta.1, family = binomial, 
                        data = Nippon.Kaigi.merged.data, 
                        subset = party == "LDP")
round(summary(logit.result.LDP)$coefficients, 3)

#### validity of manifesto-based ideal points (Online Appendix F.4) ####
Catalinac.data.2003 <- read.csv("2003_ideal_points.csv", fileEncoding = "CP932")
Catalinac.data.2005 <- read.csv("2005_ideal_points.csv", fileEncoding = "CP932")
Catalinac.data.2009 <- read.csv("2009_ideal_points.csv", fileEncoding = "CP932")

# arrange the direction of the manifesto-based estimates
Catalinac.data.2005$theta <- -1 * Catalinac.data.2005$theta
Catalinac.data.2009$theta <- -1 * Catalinac.data.2009$theta

Catalinac.data <- rbind(data.frame(Catalinac.data.2003, elec.term = 43), 
                        data.frame(Catalinac.data.2005, elec.term = 44), 
                        data.frame(Catalinac.data.2009, elec.term = 45))

## combine Catalinac's (2018) replication data and this study's data
## using the information on MPs' names and districts
Catalinac.data$id <- as.numeric(substr(Catalinac.data$file, 7, 
                                       regexpr("\\.", substr(Catalinac.data$file, 7, 
                                                             nchar(Catalinac.data$file))) + 5))
Catalinac.data$pref <- substr(Catalinac.data$file, 
                              regexpr("\\.", substr(Catalinac.data$file, 7, nchar(Catalinac.data$file))) + 7, 
                              regexpr("第", Catalinac.data$file) - 1)
Catalinac.data$dist <- as.numeric(stri_trans_nfkc(substr(Catalinac.data$file, 
                                                         regexpr("第", Catalinac.data$file) + 1, 
                                                         regexpr("区", Catalinac.data$file) - 1)))
Catalinac.data$name <- substr(Catalinac.data$file, 
                              ifelse(is.na(stri_locate_last_regex(Catalinac.data$file, "[ｦ-ﾟ]+")[, 2]), 
                                     ifelse(is.na(stri_locate_last_regex(Catalinac.data$file, "[ァ-ヴ]+")[, 2]), 
                                            regexpr("区", Catalinac.data$file) + 1, 
                                            stri_locate_last_regex(Catalinac.data$file, "[ァ-ヴ]+")[, 2] + 1),
                                     stri_locate_last_regex(Catalinac.data$file, "[ｦ-ﾟ]+")[, 2] + 1),
                              regexpr(".txt", Catalinac.data$file) - 1)

name.correction.list <- read.csv("name_correction_list.csv")
ideal.point.data.2000s <- subset(ideal.point.data, elec.term < 46)
ideal.point.data.2000s$name.C <- ideal.point.data.2000s$name
ideal.point.data.2000s$theta.C <- NA
unmatched.data <- c()
for (i in 1:nrow(ideal.point.data.2000s)) {
  if (is.na(ideal.point.data.2000s$dist[i])) {
    next
  }
  dist.subset.data <- subset(Catalinac.data, 
                             elec.term == ideal.point.data.2000s$elec.term[i] & 
                               pref == ideal.point.data.2000s$pref[i] & 
                               dist == ideal.point.data.2000s$dist[i])
  name.correction.data <- subset(name.correction.list, 
                                 name == ideal.point.data.2000s$name[i] & 
                                   elec.term == ideal.point.data.2000s$elec.term[i] & 
                                   pref == ideal.point.data.2000s$pref[i] & 
                                   dist == ideal.point.data.2000s$dist[i])
  if (nrow(name.correction.data) == 1) {
    ideal.point.data.2000s$name.C[i] <- name.correction.data$name.C
  }
  ideal.point.data.2000s$theta.C[i] <- dist.subset.data$theta[dist.subset.data$name == ideal.point.data.2000s$name.C[i]]
}

## correlation between elite survey-based and manifesto-based ideal points
ideal.point.data.2003 <- subset(ideal.point.data.2000s, elec.term == 43)
ideal.point.data.2005 <- subset(ideal.point.data.2000s, elec.term == 44)
ideal.point.data.2009 <- subset(ideal.point.data.2000s, elec.term == 45)

# entire sample
round(cor(ideal.point.data.2003$theta.1, ideal.point.data.2003$theta.C, use = "p"), 3)
round(cor(ideal.point.data.2005$theta.1, ideal.point.data.2005$theta.C, use = "p"), 3)
round(cor(ideal.point.data.2009$theta.1, ideal.point.data.2009$theta.C, use = "p"), 3)

# within LDP sample
round(cor(ideal.point.data.2003$theta.1[ideal.point.data.2003$party == "LDP"], 
          ideal.point.data.2003$theta.C[ideal.point.data.2003$party == "LDP"], use = "p"), 3)
round(cor(ideal.point.data.2005$theta.1[ideal.point.data.2005$party == "LDP"], 
          ideal.point.data.2005$theta.C[ideal.point.data.2005$party == "LDP"], use = "p"), 3)
round(cor(ideal.point.data.2009$theta.1[ideal.point.data.2009$party == "LDP"], 
          ideal.point.data.2009$theta.C[ideal.point.data.2009$party == "LDP"], use = "p"), 3)

cairo_pdf("Figure_A3.pdf", width = 5, height = 5, pointsize = 7)
layout(matrix(1:4, 2, 2, byrow = TRUE))
par(mar = c(4, 4, 2, 0.5), pty = "s", lwd = 0.5)
plot(NULL, NULL, type = "n", xlim = c(-4, 2), ylim = c(-2.5, 2.5), 
     main = "2003", xlab = "Elite survey-based estimates", 
     ylab = "Manifesto-based estimates", xaxt = "n", yaxt = "n")
points(ideal.point.data.2003$theta.1, ideal.point.data.2003$theta.C, 
       col = ifelse(ideal.point.data.2003$party == "LDP", paste0(color[12], "B0"), 
                    ifelse(ideal.point.data.2003$party == "DPJ", paste0(color[10], "B0"), 
                           ifelse(ideal.point.data.2003$party == "CGP", paste0(color[2], "B0"), 
                                  ifelse(ideal.point.data.2003$party == "JCP" | 
                                           ideal.point.data.2003$party == "SDP", "#080808B0", "black")))), 
       pch = ifelse(ideal.point.data.2003$party == "LDP", 19, 
                    ifelse(ideal.point.data.2003$party == "DPJ", 15, 
                           ifelse(ideal.point.data.2003$party == "CGP", 17, 
                                  ifelse(ideal.point.data.2003$party == "JCP" | 
                                           ideal.point.data.2003$party == "SDP", 4, 1)))))
legend("bottomright", legend = c("LDP", "DPJ", "Komeito", "JCP/SDP", "other"), 
       col = c(paste0(color[12], "B0"), paste0(color[10], "B0"), 
               paste0(color[2], "B0"), "#080808B0", "black"), 
       pch = c(19, 15, 17, 4, 1), 
       pt.cex = rep(1.2, 5))
axis(1, lwd = 0.5)
axis(2, lwd = 0.5)
plot(NULL, NULL, type = "n", xlim = c(-4, 2), ylim = c(-2.5, 2.5), 
     main = "2005", xlab = "Elite survey-based estimates", 
     ylab = "Manifesto-based estimates", xaxt = "n", yaxt = "n")
points(ideal.point.data.2005$theta.1, ideal.point.data.2005$theta.C, 
       col = ifelse(ideal.point.data.2005$party == "LDP", paste0(color[12], "B0"), 
                    ifelse(ideal.point.data.2005$party == "DPJ", paste0(color[10], "B0"), 
                           ifelse(ideal.point.data.2005$party == "CGP", paste0(color[2], "B0"), 
                                  ifelse(ideal.point.data.2005$party == "JCP" | 
                                           ideal.point.data.2005$party == "SDP", "#080808B0", "black")))), 
       pch = ifelse(ideal.point.data.2005$party == "LDP", 19, 
                    ifelse(ideal.point.data.2005$party == "DPJ", 15, 
                           ifelse(ideal.point.data.2005$party == "CGP", 17, 
                                  ifelse(ideal.point.data.2005$party == "JCP" | 
                                           ideal.point.data.2005$party == "SDP", 4, 1)))))
legend("bottomright", legend = c("LDP", "DPJ", "Komeito", "JCP/SDP", "other"), 
       col = c(paste0(color[12], "B0"), paste0(color[10], "B0"), 
               paste0(color[2], "B0"), "#080808B0", "black"), 
       pch = c(19, 15, 17, 4, 1), 
       pt.cex = rep(1.2, 5))
axis(1, lwd = 0.5)
axis(2, lwd = 0.5)
plot(NULL, NULL, type = "n", xlim = c(-4, 2), ylim = c(-2.5, 2.5), 
     main = "2009", xlab = "Elite survey-based estimates", 
     ylab = "Manifesto-based estimates", xaxt = "n", yaxt = "n")
points(ideal.point.data.2009$theta.1, ideal.point.data.2009$theta.C, 
       col = ifelse(ideal.point.data.2009$party == "LDP", paste0(color[12], "B0"), 
                    ifelse(ideal.point.data.2009$party == "DPJ", paste0(color[10], "B0"), 
                           ifelse(ideal.point.data.2009$party == "CGP", paste0(color[2], "B0"), 
                                  ifelse(ideal.point.data.2009$party == "JCP" | 
                                           ideal.point.data.2009$party == "SDP", "#080808B0", "black")))), 
       pch = ifelse(ideal.point.data.2009$party == "LDP", 19, 
                    ifelse(ideal.point.data.2009$party == "DPJ", 15, 
                           ifelse(ideal.point.data.2009$party == "CGP", 17, 
                                  ifelse(ideal.point.data.2009$party == "JCP" | 
                                           ideal.point.data.2009$party == "SDP", 4, 1)))))
legend("bottomright", legend = c("LDP", "DPJ", "Komeito", "JCP/SDP", "other"), 
       col = c(paste0(color[12], "B0"), paste0(color[10], "B0"), 
               paste0(color[2], "B0"), "#080808B0", "black"), 
       pch = c(19, 15, 17, 4, 1), 
       pt.cex = rep(1.2, 5))
axis(1, lwd = 0.5)
axis(2, lwd = 0.5)
dev.off()

## validation using the data of authors in conservative magazines
Seiron.data <- read.csv("Seiron_data.csv")
WiLL.data <- read.csv("WiLL_data.csv")
Voice.data <- read.csv("Voice_data.csv")

# author = 1 if the MP published one or more articles in Seiron, WiLL, or Voice
ideal.point.data.2009$author <- (ideal.point.data.2009$name %in% 
                                   c(Seiron.data$author, 
                                     WiLL.data$author, 
                                     Voice.data$author)) * 1

table(ideal.point.data.2009$author)  # number of MPs who published articles in Seiron, WiLL, or Voice
table(ideal.point.data.2009$author[ideal.point.data.2009$party == "LDP"])  # number of LDP MPs who published articles in Seiron, WiLL, or Voice

ideal.point.data.2009.LDP <- subset(ideal.point.data.2009, party == "LDP")

# standardize ideal points for comparison
ideal.point.data.2009$theta.1.std <- scale(ideal.point.data.2009$theta.1)
ideal.point.data.2009$theta.C.std <- scale(ideal.point.data.2009$theta.C)

ideal.point.data.2009.LDP$theta.1.std <- scale(ideal.point.data.2009.LDP$theta.1)
ideal.point.data.2009.LDP$theta.C.std <- scale(ideal.point.data.2009.LDP$theta.C)

# predict publication in Seiron, WiLL, or Voice by elite survey-based ideal points using the entire sample
logit.result.UTAS.entire <- glm(author ~ theta.1.std, family = binomial, 
                                data = ideal.point.data.2009)
round(summary(logit.result.UTAS.entire)$coefficients, 3)
# predict publication in Seiron, WiLL, or Voice by manifesto-based ideal points using the entire sample
logit.result.manifesto.entire <- glm(author ~ theta.C.std, family = binomial, 
                                     data = ideal.point.data.2009)
round(summary(logit.result.manifesto.entire)$coefficients, 3)

# predict publication in Seiron, WiLL, or Voice by elite survey-based ideal points using the LDP sample
logit.result.UTAS.LDP <- glm(author ~ theta.1.std, family = binomial, 
                             data = ideal.point.data.2009.LDP)
round(summary(logit.result.UTAS.LDP)$coefficients, 3)
# predict publication in Seiron, WiLL, or Voice by manifesto-based ideal points using the LDP sample
logit.result.manifesto.LDP <- glm(author ~ theta.C.std, family = binomial, 
                                  data = ideal.point.data.2009.LDP)
round(summary(logit.result.manifesto.LDP)$coefficients, 3)

# control seniority
logit.result.UTAS.entire.senirity.controlled <- glm(author ~ theta.1.std + log(term + 1), 
                                                    family = binomial, 
                                                    data = ideal.point.data.2009)
round(summary(logit.result.UTAS.entire.senirity.controlled)$coefficients, 3)
logit.result.manifesto.entire.senirity.controlled <- glm(author ~ theta.C.std + log(term + 1), 
                                                         family = binomial, 
                                                         data = ideal.point.data.2009)
round(summary(logit.result.manifesto.entire.senirity.controlled)$coefficients, 3)
logit.result.UTAS.LDP.senirity.controlled <- glm(author ~ theta.1.std + log(term + 1), 
                                                 family = binomial, 
                                                 data = ideal.point.data.2009.LDP)
round(summary(logit.result.UTAS.LDP.senirity.controlled)$coefficients, 3)
logit.result.manifesto.LDP.senirity.controlled <- glm(author ~ theta.C.std + log(term + 1), 
                                                      family = binomial, 
                                                      data = ideal.point.data.2009.LDP)
round(summary(logit.result.manifesto.LDP.senirity.controlled)$coefficients, 3)